longitudinal analysis
ppmi_longit<-read.csv("ppmi_top_wide_UPDRS.csv",header=TRUE)
clinical<-read.csv("ppmi_clinical.csv",header=TRUE)
merged_dataset<-merge(ppmi_longit,clinical,by="FID_IID",all.x=TRUE)
ppmi_long<-merged_dataset[,c(1:342,351:354,364,365,347)]
Longtitudinal analysis with different time points of UPDRS measurements
# use complete dataset with the result
DataMissOm<-na.omit(subset(ppmi_long,select=c(time_visit,PD,FID_IID,COGSTATE,COGDECLN,FNCDTCOG,COGDXCL,EDUCYRS,L_superior_parietal_gyrus_ComputeArea
,L_superior_parietal_gyrus_Volume,R_superior_parietal_gyrus_ComputeArea,R_superior_parietal_gyrus_Volume,L_putamen_ComputeArea,L_putamen_Volume,R_putamen_Volume
,R_putamen_ShapeIndex,L_caudate_ComputeArea,L_caudate_Volume,R_caudate_ComputeArea,R_caudate_Volume,chr12_rs34637584_GT
,chr17_rs11868035_GT,chr17_rs11012_GT,chr17_rs393152_GT,chr17_rs12185268_GT,chr17_rs199533_GT,Sex,Weight,Age,UPDRS_part_I,UPDRS_part_II,UPDRS_part_III)))
#normalize continuous variables
for(i in c(8:20,28:32)){
DataMissOm[,i]<-scale(DataMissOm[,i])
}
GEE
library(geepack)
DataMissOm$COGSTATE<-as.numeric(DataMissOm$COGSTATE)
DataMissOm$COGDECLN<-as.numeric(DataMissOm$COGDECLN)
DataMissOm$FNCDTCOG<-as.numeric(DataMissOm$FNCDTCOG)
DataMissOm$COGDXCL<-as.numeric(DataMissOm$COGDXCL)
model_gee_UPDRS<-geeglm(PD~L_superior_parietal_gyrus_ComputeArea
+L_superior_parietal_gyrus_Volume+R_superior_parietal_gyrus_ComputeArea+R_superior_parietal_gyrus_Volume+L_putamen_ComputeArea+L_putamen_Volume+R_putamen_Volume
+R_putamen_ShapeIndex+L_caudate_ComputeArea+L_caudate_Volume+R_caudate_ComputeArea+R_caudate_Volume+chr12_rs34637584_GT
+chr17_rs11868035_GT+chr17_rs11012_GT+chr17_rs393152_GT+chr17_rs12185268_GT+chr17_rs199533_GT+Sex+Weight+Age
+UPDRS_part_I+UPDRS_part_II+UPDRS_part_III+FID_IID+COGSTATE+COGDECLN+FNCDTCOG+COGDXCL+EDUCYRS,
data=DataMissOm,waves=time_visit,family="binomial",id=FID_IID,corstr="exchangeable",scale.fix=TRUE)
summary(model_gee_UPDRS)
##
## Call:
## geeglm(formula = PD ~ L_superior_parietal_gyrus_ComputeArea +
## L_superior_parietal_gyrus_Volume + R_superior_parietal_gyrus_ComputeArea +
## R_superior_parietal_gyrus_Volume + L_putamen_ComputeArea +
## L_putamen_Volume + R_putamen_Volume + R_putamen_ShapeIndex +
## L_caudate_ComputeArea + L_caudate_Volume + R_caudate_ComputeArea +
## R_caudate_Volume + chr12_rs34637584_GT + chr17_rs11868035_GT +
## chr17_rs11012_GT + chr17_rs393152_GT + chr17_rs12185268_GT +
## chr17_rs199533_GT + Sex + Weight + Age + UPDRS_part_I + UPDRS_part_II +
## UPDRS_part_III + FID_IID + COGSTATE + COGDECLN + FNCDTCOG +
## COGDXCL + EDUCYRS, family = "binomial", data = DataMissOm,
## id = FID_IID, waves = time_visit, corstr = "exchangeable",
## scale.fix = TRUE)
##
## Coefficients:
## Estimate Std.err Wald
## (Intercept) 1.787e+17 6.714e+14 70831.65
## L_superior_parietal_gyrus_ComputeArea -4.700e+17 2.633e+15 31867.29
## L_superior_parietal_gyrus_Volume 5.226e+17 2.836e+15 33961.30
## R_superior_parietal_gyrus_ComputeArea 3.618e+17 1.968e+15 33785.88
## R_superior_parietal_gyrus_Volume -4.222e+17 2.330e+15 32820.75
## L_putamen_ComputeArea -1.330e+17 9.865e+14 18176.48
## L_putamen_Volume 2.165e+17 1.166e+15 34476.22
## R_putamen_Volume -7.282e+16 4.836e+14 22677.60
## R_putamen_ShapeIndex -3.817e+15 1.923e+14 393.72
## L_caudate_ComputeArea -2.169e+17 1.567e+15 19144.96
## L_caudate_Volume 1.112e+17 1.159e+15 9199.90
## R_caudate_ComputeArea 9.570e+16 1.045e+15 8379.14
## R_caudate_Volume 1.932e+16 7.656e+14 636.44
## chr12_rs34637584_GT -1.266e+17 1.230e+15 10595.16
## chr17_rs11868035_GT 2.584e+16 3.206e+14 6497.19
## chr17_rs11012_GT 2.435e+15 6.828e+14 12.71
## chr17_rs393152_GT 1.212e+16 7.579e+14 255.77
## chr17_rs12185268_GT -1.241e+17 1.290e+15 9252.73
## chr17_rs199533_GT 7.307e+16 8.266e+14 7814.16
## Sex 5.194e+16 6.038e+14 7398.75
## Weight -1.305e+16 2.352e+14 3075.64
## Age -1.208e+16 2.022e+14 3568.28
## UPDRS_part_I 1.019e+16 1.301e+14 6133.81
## UPDRS_part_II 2.386e+15 2.648e+14 81.22
## UPDRS_part_III 5.018e+16 3.686e+14 18537.46
## FID_IID -3.435e+13 3.649e+11 8860.45
## COGSTATE 5.995e+15 2.795e+14 460.15
## COGDECLN -2.453e+16 3.557e+14 4753.03
## FNCDTCOG -1.571e+17 7.565e+14 43103.79
## COGDXCL 1.578e+16 6.795e+14 539.41
## EDUCYRS 3.086e+16 2.071e+14 22193.59
## Pr(>|W|)
## (Intercept) < 2e-16 ***
## L_superior_parietal_gyrus_ComputeArea < 2e-16 ***
## L_superior_parietal_gyrus_Volume < 2e-16 ***
## R_superior_parietal_gyrus_ComputeArea < 2e-16 ***
## R_superior_parietal_gyrus_Volume < 2e-16 ***
## L_putamen_ComputeArea < 2e-16 ***
## L_putamen_Volume < 2e-16 ***
## R_putamen_Volume < 2e-16 ***
## R_putamen_ShapeIndex < 2e-16 ***
## L_caudate_ComputeArea < 2e-16 ***
## L_caudate_Volume < 2e-16 ***
## R_caudate_ComputeArea < 2e-16 ***
## R_caudate_Volume < 2e-16 ***
## chr12_rs34637584_GT < 2e-16 ***
## chr17_rs11868035_GT < 2e-16 ***
## chr17_rs11012_GT 0.000363 ***
## chr17_rs393152_GT < 2e-16 ***
## chr17_rs12185268_GT < 2e-16 ***
## chr17_rs199533_GT < 2e-16 ***
## Sex < 2e-16 ***
## Weight < 2e-16 ***
## Age < 2e-16 ***
## UPDRS_part_I < 2e-16 ***
## UPDRS_part_II < 2e-16 ***
## UPDRS_part_III < 2e-16 ***
## FID_IID < 2e-16 ***
## COGSTATE < 2e-16 ***
## COGDECLN < 2e-16 ***
## FNCDTCOG < 2e-16 ***
## COGDXCL < 2e-16 ***
## EDUCYRS < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Scale is fixed.
##
## Correlation: Structure = exchangeable Link = identity
##
## Estimated Correlation Parameters:
## Estimate Std.err
## alpha 2.217e+15 1.086e+30
## Number of clusters: 406 Maximum cluster size: 12
GLMM
library(lme4)
## Loading required package: Matrix
model_lmm_UPDRS<-glmer(PD~L_superior_parietal_gyrus_ComputeArea
+L_superior_parietal_gyrus_Volume+R_superior_parietal_gyrus_ComputeArea+R_superior_parietal_gyrus_Volume+L_putamen_ComputeArea+L_putamen_Volume+R_putamen_Volume
+R_putamen_ShapeIndex+L_caudate_ComputeArea+L_caudate_Volume+R_caudate_ComputeArea+R_caudate_Volume+chr12_rs34637584_GT
+chr17_rs11868035_GT+chr17_rs11012_GT+chr17_rs393152_GT+chr17_rs12185268_GT+chr17_rs199533_GT+Sex+Weight+Age+UPDRS_part_I+UPDRS_part_II+UPDRS_part_III+FID_IID+COGSTATE+COGDECLN+FNCDTCOG+COGDXCL+EDUCYRS
+(1|FID_IID),control = glmerControl(optCtrl=list(maxfun=20000)),
data=DataMissOm,family="binomial")
summary(model_lmm_UPDRS)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## PD ~ L_superior_parietal_gyrus_ComputeArea + L_superior_parietal_gyrus_Volume +
## R_superior_parietal_gyrus_ComputeArea + R_superior_parietal_gyrus_Volume +
## L_putamen_ComputeArea + L_putamen_Volume + R_putamen_Volume +
## R_putamen_ShapeIndex + L_caudate_ComputeArea + L_caudate_Volume +
## R_caudate_ComputeArea + R_caudate_Volume + chr12_rs34637584_GT +
## chr17_rs11868035_GT + chr17_rs11012_GT + chr17_rs393152_GT +
## chr17_rs12185268_GT + chr17_rs199533_GT + Sex + Weight +
## Age + UPDRS_part_I + UPDRS_part_II + UPDRS_part_III + FID_IID +
## COGSTATE + COGDECLN + FNCDTCOG + COGDXCL + EDUCYRS + (1 |
## FID_IID)
## Data: DataMissOm
## Control: glmerControl(optCtrl = list(maxfun = 20000))
##
## AIC BIC logLik deviance df.resid
## 193.8 380.9 -64.9 129.8 2530
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -0.682 0.000 0.000 0.000 0.706
##
## Random effects:
## Groups Name Variance Std.Dev.
## FID_IID (Intercept) 36.3 6.03
## Number of obs: 2562, groups: FID_IID, 406
##
## Fixed effects:
## Estimate Std. Error z value
## (Intercept) -7.82e+01 6.52e+04 0.00
## L_superior_parietal_gyrus_ComputeArea -1.94e+00 6.84e+00 -0.28
## L_superior_parietal_gyrus_Volume 2.34e+00 7.22e+00 0.32
## R_superior_parietal_gyrus_ComputeArea -1.36e-01 6.28e+00 -0.02
## R_superior_parietal_gyrus_Volume 1.31e+00 6.66e+00 0.20
## L_putamen_ComputeArea -3.63e+00 5.75e+00 -0.63
## L_putamen_Volume 2.84e+00 5.15e+00 0.55
## R_putamen_Volume -2.95e-01 3.41e+00 -0.09
## R_putamen_ShapeIndex -2.06e-01 1.35e+00 -0.15
## L_caudate_ComputeArea -6.77e+00 7.93e+00 -0.85
## L_caudate_Volume 3.38e+00 7.58e+00 0.45
## R_caudate_ComputeArea 5.30e+00 8.53e+00 0.62
## R_caudate_Volume -2.08e+00 7.78e+00 -0.27
## chr12_rs34637584_GT -4.51e+00 4.94e+02 -0.01
## chr17_rs11868035_GT -4.92e-01 1.65e+00 -0.30
## chr17_rs11012_GT 7.34e-01 3.40e+00 0.22
## chr17_rs393152_GT 2.92e+00 3.43e+00 0.85
## chr17_rs12185268_GT 3.02e+00 5.76e+00 0.52
## chr17_rs199533_GT -2.90e+00 4.92e+00 -0.59
## Sex 1.09e+00 2.73e+00 0.40
## Weight 1.88e+00 1.35e+00 1.40
## Age 1.83e+00 1.33e+00 1.38
## UPDRS_part_I 3.29e-01 1.70e+00 0.19
## UPDRS_part_II -1.28e+01 4.02e+00 -3.19
## UPDRS_part_III -2.09e+01 5.74e+00 -3.65
## FID_IID -1.60e-03 3.31e-03 -0.48
## COGSTATE 9.42e+00 8.01e+00 1.18
## COGDECLN -4.48e+00 4.12e+00 -1.09
## FNCDTCOG 8.71e+00 6.52e+04 0.00
## COGDXCL -4.27e+00 5.06e+00 -0.84
## EDUCYRS 3.68e-02 1.29e+00 0.03
## Pr(>|z|)
## (Intercept) 0.99904
## L_superior_parietal_gyrus_ComputeArea 0.77698
## L_superior_parietal_gyrus_Volume 0.74594
## R_superior_parietal_gyrus_ComputeArea 0.98267
## R_superior_parietal_gyrus_Volume 0.84431
## L_putamen_ComputeArea 0.52797
## L_putamen_Volume 0.58167
## R_putamen_Volume 0.93113
## R_putamen_ShapeIndex 0.87846
## L_caudate_ComputeArea 0.39352
## L_caudate_Volume 0.65557
## R_caudate_ComputeArea 0.53451
## R_caudate_Volume 0.78918
## chr12_rs34637584_GT 0.99272
## chr17_rs11868035_GT 0.76636
## chr17_rs11012_GT 0.82913
## chr17_rs393152_GT 0.39461
## chr17_rs12185268_GT 0.60026
## chr17_rs199533_GT 0.55589
## Sex 0.68982
## Weight 0.16203
## Age 0.16687
## UPDRS_part_I 0.84611
## UPDRS_part_II 0.00140 **
## UPDRS_part_III 0.00026 ***
## FID_IID 0.62892
## COGSTATE 0.23984
## COGDECLN 0.27759
## FNCDTCOG 0.99989
## COGDXCL 0.39832
## EDUCYRS 0.97715
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation matrix not shown by default, as p = 31 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
## fit warnings:
## Some predictor variables are on very different scales: consider rescaling
## convergence code: 0
## unable to evaluate scaled gradient
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
## failure to converge in 20000 evaluations
length(unique(DataMissOm$FID_IID)) #406 patients
## [1] 406
nrow(DataMissOm) #2562 observations
## [1] 2562
Longtitudinal analysis with different time point of brain imaging measures
ppmi_base<-read.csv("ppmi_ROI_baseline.csv",header=TRUE)
ppmi_base<-ppmi_base[,c(2,24:26,29:33)] #may add 27 28
ppmi_new<-read.csv("PPMI_GSA_clinical.csv",header=TRUE)
ppmi_imaging<-ppmi_new[,1:335]
merged_dataset<-merge(ppmi_imaging,ppmi_base,by="FID_IID",all.x=TRUE)
DataMissOm<-na.omit(subset(merged_dataset,select=c(VisitID,ResearchGroup,FID_IID,COGSTATE,COGDECLN,FNCDTCOG,COGDXCL,EDUCYRS,L_superior_parietal_gyrus_ComputeArea
,L_superior_parietal_gyrus_Volume,R_superior_parietal_gyrus_ComputeArea,R_superior_parietal_gyrus_Volume,L_putamen_ComputeArea,L_putamen_Volume,R_putamen_Volume
,R_putamen_ShapeIndex,L_caudate_ComputeArea,L_caudate_Volume,R_caudate_ComputeArea,R_caudate_Volume,chr12_rs34637584_GT
,chr17_rs11868035_GT,chr17_rs11012_GT,chr17_rs393152_GT,chr17_rs12185268_GT,chr17_rs199533_GT,Sex,Weight,Age,UPDRS_Part_I_Summary_Score_Baseline,UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Baseline,UPDRS_Part_III_Summary_Score_Baseline)))
#normalize continuous variables
for(i in c(9:20,28:32)){
DataMissOm[,i]<-scale(DataMissOm[,i])
}
library(geepack)
DataMissOm$COGSTATE<-as.numeric(DataMissOm$COGSTATE)
DataMissOm$COGDECLN<-as.numeric(DataMissOm$COGDECLN)
DataMissOm$FNCDTCOG<-as.numeric(DataMissOm$FNCDTCOG)
DataMissOm$COGDXCL<-as.numeric(DataMissOm$COGDXCL)
DataMissOm$PD<-ifelse(DataMissOm$ResearchGroup=="Control",0,1)
model_gee_imaging<-geeglm(PD~L_superior_parietal_gyrus_ComputeArea
+L_superior_parietal_gyrus_Volume+R_superior_parietal_gyrus_ComputeArea+R_superior_parietal_gyrus_Volume+L_putamen_ComputeArea+L_putamen_Volume+R_putamen_Volume
+R_putamen_ShapeIndex+L_caudate_ComputeArea+L_caudate_Volume+R_caudate_ComputeArea+R_caudate_Volume+chr12_rs34637584_GT
+chr17_rs11868035_GT+chr17_rs11012_GT+chr17_rs393152_GT+chr17_rs12185268_GT+chr17_rs199533_GT+Sex+Weight+Age
+UPDRS_Part_I_Summary_Score_Baseline+UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Baseline+UPDRS_Part_III_Summary_Score_Baseline+FID_IID+COGSTATE+COGDECLN+FNCDTCOG+COGDXCL+EDUCYRS,
data=DataMissOm,waves=VisitID,family="binomial",id=FID_IID,corstr="exchangeable",scale.fix=TRUE)
summary(model_gee_imaging)
##
## Call:
## geeglm(formula = PD ~ L_superior_parietal_gyrus_ComputeArea +
## L_superior_parietal_gyrus_Volume + R_superior_parietal_gyrus_ComputeArea +
## R_superior_parietal_gyrus_Volume + L_putamen_ComputeArea +
## L_putamen_Volume + R_putamen_Volume + R_putamen_ShapeIndex +
## L_caudate_ComputeArea + L_caudate_Volume + R_caudate_ComputeArea +
## R_caudate_Volume + chr12_rs34637584_GT + chr17_rs11868035_GT +
## chr17_rs11012_GT + chr17_rs393152_GT + chr17_rs12185268_GT +
## chr17_rs199533_GT + Sex + Weight + Age + UPDRS_Part_I_Summary_Score_Baseline +
## UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Baseline +
## UPDRS_Part_III_Summary_Score_Baseline + FID_IID + COGSTATE +
## COGDECLN + FNCDTCOG + COGDXCL + EDUCYRS, family = "binomial",
## data = DataMissOm, id = FID_IID, waves = VisitID, corstr = "exchangeable",
## scale.fix = TRUE)
##
## Coefficients:
## Estimate
## (Intercept) 3.95e+02
## L_superior_parietal_gyrus_ComputeArea 2.21e+02
## L_superior_parietal_gyrus_Volume -2.66e+02
## R_superior_parietal_gyrus_ComputeArea 1.19e+01
## R_superior_parietal_gyrus_Volume 4.60e+01
## L_putamen_ComputeArea 1.77e+02
## L_putamen_Volume -4.79e+01
## R_putamen_Volume -1.25e+02
## R_putamen_ShapeIndex 1.95e+01
## L_caudate_ComputeArea -3.39e+01
## L_caudate_Volume 1.78e+02
## R_caudate_ComputeArea -3.14e+01
## R_caudate_Volume -9.44e+01
## chr12_rs34637584_GT -5.51e+02
## chr17_rs11868035_GT -7.33e+01
## chr17_rs11012_GT 1.02e+02
## chr17_rs393152_GT -2.71e+02
## chr17_rs12185268_GT 1.61e+02
## chr17_rs199533_GT -2.52e+01
## Sex 1.35e+02
## Weight 4.50e+01
## Age -5.73e+01
## UPDRS_Part_I_Summary_Score_Baseline -1.20e+02
## UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Baseline 6.32e+02
## UPDRS_Part_III_Summary_Score_Baseline 5.96e+02
## FID_IID 1.56e-01
## COGSTATE -6.57e+01
## COGDECLN 4.47e+02
## FNCDTCOG -8.99e+02
## COGDXCL 9.39e+01
## EDUCYRS 9.21e+00
## Std.err
## (Intercept) 4.34e+00
## L_superior_parietal_gyrus_ComputeArea 3.06e+00
## L_superior_parietal_gyrus_Volume 3.17e+00
## R_superior_parietal_gyrus_ComputeArea 1.48e+00
## R_superior_parietal_gyrus_Volume 1.38e+00
## L_putamen_ComputeArea 1.22e+00
## L_putamen_Volume 9.56e-01
## R_putamen_Volume 1.15e+00
## R_putamen_ShapeIndex 5.92e-01
## L_caudate_ComputeArea 2.33e+00
## L_caudate_Volume 2.10e+00
## R_caudate_ComputeArea 1.74e+00
## R_caudate_Volume 1.90e+00
## chr12_rs34637584_GT 9.80e-01
## chr17_rs11868035_GT 4.87e-01
## chr17_rs11012_GT 1.14e+00
## chr17_rs393152_GT 9.68e-01
## chr17_rs12185268_GT 1.51e+00
## chr17_rs199533_GT 8.56e-01
## Sex 9.16e-01
## Weight 3.21e-01
## Age 3.02e-01
## UPDRS_Part_I_Summary_Score_Baseline 3.93e-01
## UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Baseline 3.20e-01
## UPDRS_Part_III_Summary_Score_Baseline 3.99e-01
## FID_IID 9.25e-04
## COGSTATE 1.15e+00
## COGDECLN 1.58e+00
## FNCDTCOG 2.18e+00
## COGDXCL 1.48e+00
## EDUCYRS 1.02e-01
## Wald
## (Intercept) 8.27e+03
## L_superior_parietal_gyrus_ComputeArea 5.24e+03
## L_superior_parietal_gyrus_Volume 7.06e+03
## R_superior_parietal_gyrus_ComputeArea 6.43e+01
## R_superior_parietal_gyrus_Volume 1.11e+03
## L_putamen_ComputeArea 2.12e+04
## L_putamen_Volume 2.51e+03
## R_putamen_Volume 1.18e+04
## R_putamen_ShapeIndex 1.09e+03
## L_caudate_ComputeArea 2.11e+02
## L_caudate_Volume 7.15e+03
## R_caudate_ComputeArea 3.26e+02
## R_caudate_Volume 2.46e+03
## chr12_rs34637584_GT 3.17e+05
## chr17_rs11868035_GT 2.27e+04
## chr17_rs11012_GT 8.00e+03
## chr17_rs393152_GT 7.83e+04
## chr17_rs12185268_GT 1.15e+04
## chr17_rs199533_GT 8.69e+02
## Sex 2.17e+04
## Weight 1.97e+04
## Age 3.59e+04
## UPDRS_Part_I_Summary_Score_Baseline 9.24e+04
## UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Baseline 3.91e+06
## UPDRS_Part_III_Summary_Score_Baseline 2.24e+06
## FID_IID 2.83e+04
## COGSTATE 3.26e+03
## COGDECLN 8.00e+04
## FNCDTCOG 1.70e+05
## COGDXCL 4.03e+03
## EDUCYRS 8.17e+03
## Pr(>|W|)
## (Intercept) < 2e-16 ***
## L_superior_parietal_gyrus_ComputeArea < 2e-16 ***
## L_superior_parietal_gyrus_Volume < 2e-16 ***
## R_superior_parietal_gyrus_ComputeArea 1.1e-15 ***
## R_superior_parietal_gyrus_Volume < 2e-16 ***
## L_putamen_ComputeArea < 2e-16 ***
## L_putamen_Volume < 2e-16 ***
## R_putamen_Volume < 2e-16 ***
## R_putamen_ShapeIndex < 2e-16 ***
## L_caudate_ComputeArea < 2e-16 ***
## L_caudate_Volume < 2e-16 ***
## R_caudate_ComputeArea < 2e-16 ***
## R_caudate_Volume < 2e-16 ***
## chr12_rs34637584_GT < 2e-16 ***
## chr17_rs11868035_GT < 2e-16 ***
## chr17_rs11012_GT < 2e-16 ***
## chr17_rs393152_GT < 2e-16 ***
## chr17_rs12185268_GT < 2e-16 ***
## chr17_rs199533_GT < 2e-16 ***
## Sex < 2e-16 ***
## Weight < 2e-16 ***
## Age < 2e-16 ***
## UPDRS_Part_I_Summary_Score_Baseline < 2e-16 ***
## UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Baseline < 2e-16 ***
## UPDRS_Part_III_Summary_Score_Baseline < 2e-16 ***
## FID_IID < 2e-16 ***
## COGSTATE < 2e-16 ***
## COGDECLN < 2e-16 ***
## FNCDTCOG < 2e-16 ***
## COGDXCL < 2e-16 ***
## EDUCYRS < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Scale is fixed.
##
## Correlation: Structure = exchangeable Link = identity
##
## Estimated Correlation Parameters:
## Estimate Std.err
## alpha 3.68e-07 2.62e-06
## Number of clusters: 423 Maximum cluster size: 3
baseline analysis
ppmi_1<-ppmi[ppmi$VisitID==1,]
ppmi_1_complete<-ppmi_1[,c(1,
3:282, # imaging index
283:284,287, #demographic index
288,291,294,297,300,303, #genotype
336,348,360,384,392, #UPDRS+asessment non motor may delete 372
400:404, #clinical index
405:406)]
ppmi_1_complete$ind<-is.na(ppmi_1_complete$chr17_rs199533_GT)
ppmi_1_complete<-ppmi_1_complete[ppmi_1_complete$ind==FALSE,-303]
ppmi_MI<-ppmi_1_complete[,c(291:300)]
Multiple Imputation
library(mi)
## Loading required package: stats4
## mi (Version 1.0, packaged: 2015-04-16 14:03:10 UTC; goodrich)
## mi Copyright (C) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Trustees of Columbia University
## This program comes with ABSOLUTELY NO WARRANTY.
## This is free software, and you are welcome to redistribute it
## under the General Public License version 2 or later.
## Execute RShowDoc('COPYING') for details.
set.seed(1414)
mdf<-missing_data.frame(ppmi_MI)
## NOTE: The following pairs of variables appear to have the same missingness pattern.
## Please verify whether they are in fact logically distinct variables.
## [,1] [,2]
## [1,] "COGDECLN" "FNCDTCOG"
## [2,] "COGDECLN" "COGDXCL"
## [3,] "FNCDTCOG" "COGDXCL"
summary(mdf)
## UPDRS_Part_I_Summary_Score_Baseline
## Min. : 0.00
## 1st Qu.: 0.00
## Median : 1.00
## Mean : 1.06
## 3rd Qu.: 2.00
## Max. :10.00
## NA's :2
## UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Baseline
## Min. : 0.00
## 1st Qu.: 1.00
## Median : 3.00
## Mean : 4.41
## 3rd Qu.: 7.00
## Max. :25.00
## NA's :3
## UPDRS_Part_III_Summary_Score_Baseline
## Min. : 0
## 1st Qu.: 3
## Median :16
## Mean :15
## 3rd Qu.:23
## Max. :47
## NA's :2
## X_Assessment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_Baseline
## Min. : 0.00
## 1st Qu.: 3.00
## Median : 6.00
## Mean : 6.02
## 3rd Qu.: 8.00
## Max. :19.00
## NA's :4
## X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Baseline
## Min. : 1.00
## 1st Qu.: 5.00
## Median : 5.00
## Mean : 5.23
## 3rd Qu.: 6.00
## Max. :15.00
## NA's :1
## COGSTATE COGDECLN FNCDTCOG
## 1930 : 0 No :374 No :399
## 1941 : 0 Right: 0 Yes : 9
## 1955 : 0 Yes : 34 NA's: 15
## Dementia (PDD) : 5 NA's : 15
## Mild Cognitive Impairment (PD-MCI): 52
## Normal Cognition (PD-NC) :350
## NA's : 16
## COGDXCL EDUCYRS
## 10% - 49% : 5 Min. : 5.0
## 50% - 89% : 57 1st Qu.:14.0
## 90% - 100%:346 Median :16.0
## NA's : 15 Mean :16.1
## 3rd Qu.:18.0
## Max. :26.0
##
imputation<-mi(mdf)
complete<-complete(imputation)
c4<-complete[[4]] #extract complete dataset from it from imputed datasets.
# in this case, we randomly chose 4th.
c4<-c4[,1:10]
ppmi_1<-cbind(ppmi_1_complete[,1:290],c4,ppmi_1_complete[,301:302]) #complete dataset
ppmi_1_ROI<-ppmi_1[,c(1,23,24 ,28,29,33,34,38,39,143,144,148,149,282:302)] #complete dataset of regions of interest
write.csv(ppmi_1,"ppmi_complete_baseline.csv")
write.csv(ppmi_1_ROI,"ppmi_ROI_baseline.csv")
explore the distribution of baseline data
#categorical
col_n<-c(14,17:22,28:31)
#ppmi_1_ROI$APPRDX<-as.numeric(ppmi_1_ROI$APPRDX)
# frequency table
for (i in col_n){
print(colnames(ppmi_1_ROI[i]))
m<-table(ppmi_1_ROI[,i],ppmi_1_ROI[,33])
print(m)
print(fisher.test(m)$p.value)
}
## [1] "Sex"
##
## HC PD
## 1 84 193
## 2 39 107
## [1] 0.499
## [1] "chr12_rs34637584_GT"
##
## HC PD
## 0 123 296
## 1 0 4
## [1] 0.327
## [1] "chr17_rs11868035_GT"
##
## HC PD
## 0 67 142
## 1 40 127
## 2 16 31
## [1] 0.16
## [1] "chr17_rs11012_GT"
##
## HC PD
## 0 84 204
## 1 37 86
## 2 2 10
## [1] 0.716
## [1] "chr17_rs393152_GT"
##
## HC PD
## 0 77 186
## 1 40 99
## 2 6 15
## [1] 1
## [1] "chr17_rs12185268_GT"
##
## HC PD
## 0 80 189
## 1 39 98
## 2 4 13
## [1] 0.888
## [1] "chr17_rs199533_GT"
##
## HC PD
## 0 82 193
## 1 39 96
## 2 2 11
## [1] 0.598
## [1] "COGSTATE"
##
## HC PD
## Dementia (PDD) 0 5
## Mild Cognitive Impairment (PD-MCI) 1 51
## Normal Cognition (PD-NC) 122 244
## [1] 1e-07
## [1] "COGDECLN"
##
## HC PD
## No 122 267
## Yes 1 33
## [1] 0.000114
## [1] "FNCDTCOG"
##
## HC PD
## No 123 288
## Yes 0 12
## [1] 0.0221
## [1] "COGDXCL"
##
## HC PD
## 10% - 49% 0 5
## 50% - 89% 5 52
## 90% - 100% 118 243
## [1] 8.04e-05
for (i in col_n){
print(colnames(ppmi_1_ROI[i]))
m<-table(ppmi_1_ROI[,i],ppmi_1_ROI[,34])
print(m)
print(fisher.test(m)$p.value)
}
## [1] "Sex"
##
## psychiatric Healthy Control Parkinson's disease SWEDD
## 1 0 84 170 23
## 2 0 39 93 14
## [1] 0.711
## [1] "chr12_rs34637584_GT"
##
## psychiatric Healthy Control Parkinson's disease SWEDD
## 0 0 123 259 37
## 1 0 0 4 0
## [1] 0.523
## [1] "chr17_rs11868035_GT"
##
## psychiatric Healthy Control Parkinson's disease SWEDD
## 0 0 67 127 15
## 1 0 40 107 20
## 2 0 16 29 2
## [1] 0.189
## [1] "chr17_rs11012_GT"
##
## psychiatric Healthy Control Parkinson's disease SWEDD
## 0 0 84 176 28
## 1 0 37 78 8
## 2 0 2 9 1
## [1] 0.733
## [1] "chr17_rs393152_GT"
##
## psychiatric Healthy Control Parkinson's disease SWEDD
## 0 0 77 160 26
## 1 0 40 89 10
## 2 0 6 14 1
## [1] 0.905
## [1] "chr17_rs12185268_GT"
##
## psychiatric Healthy Control Parkinson's disease SWEDD
## 0 0 80 163 26
## 1 0 39 88 10
## 2 0 4 12 1
## [1] 0.906
## [1] "chr17_rs199533_GT"
##
## psychiatric Healthy Control Parkinson's disease SWEDD
## 0 0 82 167 26
## 1 0 39 86 10
## 2 0 2 10 1
## [1] 0.779
## [1] "COGSTATE"
##
## psychiatric Healthy Control
## Dementia (PDD) 0 0
## Mild Cognitive Impairment (PD-MCI) 0 1
## Normal Cognition (PD-NC) 0 122
##
## Parkinson's disease SWEDD
## Dementia (PDD) 5 0
## Mild Cognitive Impairment (PD-MCI) 40 11
## Normal Cognition (PD-NC) 218 26
## [1] 7.6e-08
## [1] "COGDECLN"
##
## psychiatric Healthy Control Parkinson's disease SWEDD
## No 0 122 235 32
## Yes 0 1 28 5
## [1] 0.000223
## [1] "FNCDTCOG"
##
## psychiatric Healthy Control Parkinson's disease SWEDD
## No 0 123 254 34
## Yes 0 0 9 3
## [1] 0.00897
## [1] "COGDXCL"
##
## psychiatric Healthy Control Parkinson's disease SWEDD
## 10% - 49% 0 0 5 0
## 50% - 89% 0 5 45 7
## 90% - 100% 0 118 213 30
## [1] 0.000547
#continuous
#normality test
col_n<-c(2:13,15,16,23:27,32)
aa<-matrix(NA,20,1)
k<-1
for(i in col_n){
aa[k,1]<-shapiro.test(ppmi_1_ROI[,i])$p.value
k<-k+1
}
k<-1
#for(i in col_n){
# x<-ppmi_1_ROI[ppmi_1_ROI$RECRUITMENT_CAT=="HC",i]
# y<-ppmi_1_ROI[ppmi_1_ROI$RECRUITMENT_CAT=="PD",i]
# aa[k,2]<-t.test(x,y)$p.value
# k<-k+1
#}
aa<-as.data.frame(aa)
rownames(aa)<-colnames(ppmi_1_ROI)[col_n]
colnames(aa)<-c("normality-test (>0.1 normal)")
print(aa)
## normality-test (>0.1 normal)
## L_caudate_ComputeArea 3.65e-10
## L_caudate_Volume 9.34e-12
## R_caudate_ComputeArea 5.56e-18
## R_caudate_Volume 8.53e-15
## L_putamen_ComputeArea 3.16e-08
## L_putamen_Volume 1.64e-07
## R_putamen_ComputeArea 5.34e-18
## R_putamen_Volume 9.21e-14
## L_superior_parietal_gyrus_ComputeArea 5.55e-15
## L_superior_parietal_gyrus_Volume 3.26e-12
## R_superior_parietal_gyrus_ComputeArea 9.98e-16
## R_superior_parietal_gyrus_Volume 5.92e-12
## Weight 7.37e-04
## Age 3.22e-03
## UPDRS_Part_I_Summary_Score_Baseline 4.81e-25
## UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Baseline 2.25e-18
## UPDRS_Part_III_Summary_Score_Baseline 2.95e-24
## X_Assessment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_Baseline 3.10e-10
## X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Baseline 1.12e-19
## EDUCYRS 3.97e-08
write.csv(aa,"continuous_descriptive.csv")
model-based analysis
Logistic Regression
#logistic regression
for(i in c(2:13,15,16,23:27,32)){
ppmi_1_ROI[,i]<-scale(ppmi_1_ROI[,i])
}
ppmi_1_ROI<-as.data.frame(ppmi_1_ROI)
levels(ppmi_1_ROI$RECRUITMENT_CAT)<-c(0,1)
ppmi_1_ROI$RECRUITMENT_CAT<-as.numeric(ppmi_1_ROI$RECRUITMENT_CAT)
ppmi_1_ROI$RECRUITMENT_CAT<-ppmi_1_ROI$RECRUITMENT_CAT-1
logistic_ROI<-glm(RECRUITMENT_CAT~.,data=ppmi_1_ROI[,-c(1,34)],maxit=50,family="binomial")
library(MASS)
stepp<-stepAIC(logistic_ROI,trace=FALSE) #stepwise logistic regression
summary(logistic_ROI)
##
## Call:
## glm(formula = RECRUITMENT_CAT ~ ., family = "binomial", data = ppmi_1_ROI[,
## -c(1, 34)], maxit = 50)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -5.669 -0.147 0.001 0.075 1.910
##
## Coefficients:
## Estimate
## (Intercept) 1.71e+01
## L_caudate_ComputeArea 1.28e+00
## L_caudate_Volume -3.71e-01
## R_caudate_ComputeArea -1.99e+00
## R_caudate_Volume 6.75e-01
## L_putamen_ComputeArea -4.27e-01
## L_putamen_Volume 9.21e-01
## R_putamen_ComputeArea 4.31e-01
## R_putamen_Volume -5.46e-01
## L_superior_parietal_gyrus_ComputeArea 2.19e+00
## L_superior_parietal_gyrus_Volume -1.39e+00
## R_superior_parietal_gyrus_ComputeArea -6.42e-01
## R_superior_parietal_gyrus_Volume 1.10e-01
## Sex -2.54e-01
## Weight -2.29e-01
## Age -2.92e-01
## chr12_rs34637584_GT 1.67e+01
## chr17_rs11868035_GT 7.71e-02
## chr17_rs11012_GT 6.12e-01
## chr17_rs393152_GT -1.39e+00
## chr17_rs12185268_GT 9.54e-01
## chr17_rs199533_GT -6.71e-02
## UPDRS_Part_I_Summary_Score_Baseline 6.22e-01
## UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Baseline 7.59e+00
## UPDRS_Part_III_Summary_Score_Baseline 1.74e+00
## X_Assessment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_Baseline -2.54e-01
## X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Baseline 5.76e-03
## COGSTATEMild Cognitive Impairment (PD-MCI) 4.59e+00
## COGSTATENormal Cognition (PD-NC) 1.11e+00
## COGDECLNYes 3.33e+00
## FNCDTCOGYes 1.26e+01
## COGDXCL50% - 89% -1.32e+01
## COGDXCL90% - 100% -1.17e+01
## EDUCYRS -1.41e-01
## Std. Error
## (Intercept) 4.41e+03
## L_caudate_ComputeArea 1.77e+00
## L_caudate_Volume 1.64e+00
## R_caudate_ComputeArea 1.97e+00
## R_caudate_Volume 1.81e+00
## L_putamen_ComputeArea 8.98e-01
## L_putamen_Volume 1.03e+00
## R_putamen_ComputeArea 1.59e+00
## R_putamen_Volume 1.58e+00
## L_superior_parietal_gyrus_ComputeArea 1.53e+00
## L_superior_parietal_gyrus_Volume 1.57e+00
## R_superior_parietal_gyrus_ComputeArea 1.30e+00
## R_superior_parietal_gyrus_Volume 1.37e+00
## Sex 6.15e-01
## Weight 2.70e-01
## Age 2.56e-01
## chr12_rs34637584_GT 3.62e+03
## chr17_rs11868035_GT 3.58e-01
## chr17_rs11012_GT 8.83e-01
## chr17_rs393152_GT 7.90e-01
## chr17_rs12185268_GT 1.24e+00
## chr17_rs199533_GT 9.76e-01
## UPDRS_Part_I_Summary_Score_Baseline 3.94e-01
## UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Baseline 1.33e+00
## UPDRS_Part_III_Summary_Score_Baseline 4.92e-01
## X_Assessment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_Baseline 2.98e-01
## X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Baseline 2.25e-01
## COGSTATEMild Cognitive Impairment (PD-MCI) 2.75e+03
## COGSTATENormal Cognition (PD-NC) 2.75e+03
## COGDECLNYes 2.19e+00
## FNCDTCOGYes 1.79e+03
## COGDXCL50% - 89% 3.45e+03
## COGDXCL90% - 100% 3.45e+03
## EDUCYRS 2.40e-01
## z value
## (Intercept) 0.00
## L_caudate_ComputeArea 0.72
## L_caudate_Volume -0.23
## R_caudate_ComputeArea -1.01
## R_caudate_Volume 0.37
## L_putamen_ComputeArea -0.48
## L_putamen_Volume 0.90
## R_putamen_ComputeArea 0.27
## R_putamen_Volume -0.35
## L_superior_parietal_gyrus_ComputeArea 1.43
## L_superior_parietal_gyrus_Volume -0.89
## R_superior_parietal_gyrus_ComputeArea -0.49
## R_superior_parietal_gyrus_Volume 0.08
## Sex -0.41
## Weight -0.85
## Age -1.14
## chr12_rs34637584_GT 0.00
## chr17_rs11868035_GT 0.22
## chr17_rs11012_GT 0.69
## chr17_rs393152_GT -1.76
## chr17_rs12185268_GT 0.77
## chr17_rs199533_GT -0.07
## UPDRS_Part_I_Summary_Score_Baseline 1.58
## UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Baseline 5.69
## UPDRS_Part_III_Summary_Score_Baseline 3.54
## X_Assessment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_Baseline -0.85
## X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Baseline 0.03
## COGSTATEMild Cognitive Impairment (PD-MCI) 0.00
## COGSTATENormal Cognition (PD-NC) 0.00
## COGDECLNYes 1.52
## FNCDTCOGYes 0.01
## COGDXCL50% - 89% 0.00
## COGDXCL90% - 100% 0.00
## EDUCYRS -0.59
## Pr(>|z|)
## (Intercept) 0.99692
## L_caudate_ComputeArea 0.46936
## L_caudate_Volume 0.82124
## R_caudate_ComputeArea 0.31426
## R_caudate_Volume 0.70948
## L_putamen_ComputeArea 0.63453
## L_putamen_Volume 0.36976
## R_putamen_ComputeArea 0.78679
## R_putamen_Volume 0.72889
## L_superior_parietal_gyrus_ComputeArea 0.15192
## L_superior_parietal_gyrus_Volume 0.37614
## R_superior_parietal_gyrus_ComputeArea 0.62105
## R_superior_parietal_gyrus_Volume 0.93613
## Sex 0.67939
## Weight 0.39717
## Age 0.25433
## chr12_rs34637584_GT 0.99632
## chr17_rs11868035_GT 0.82968
## chr17_rs11012_GT 0.48804
## chr17_rs393152_GT 0.07762
## chr17_rs12185268_GT 0.44316
## chr17_rs199533_GT 0.94519
## UPDRS_Part_I_Summary_Score_Baseline 0.11463
## UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Baseline 1.2e-08
## UPDRS_Part_III_Summary_Score_Baseline 0.00041
## X_Assessment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_Baseline 0.39392
## X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Baseline 0.97955
## COGSTATEMild Cognitive Impairment (PD-MCI) 0.99867
## COGSTATENormal Cognition (PD-NC) 0.99968
## COGDECLNYes 0.12861
## FNCDTCOGYes 0.99438
## COGDXCL50% - 89% 0.99694
## COGDXCL90% - 100% 0.99729
## EDUCYRS 0.55634
##
## (Intercept)
## L_caudate_ComputeArea
## L_caudate_Volume
## R_caudate_ComputeArea
## R_caudate_Volume
## L_putamen_ComputeArea
## L_putamen_Volume
## R_putamen_ComputeArea
## R_putamen_Volume
## L_superior_parietal_gyrus_ComputeArea
## L_superior_parietal_gyrus_Volume
## R_superior_parietal_gyrus_ComputeArea
## R_superior_parietal_gyrus_Volume
## Sex
## Weight
## Age
## chr12_rs34637584_GT
## chr17_rs11868035_GT
## chr17_rs11012_GT
## chr17_rs393152_GT .
## chr17_rs12185268_GT
## chr17_rs199533_GT
## UPDRS_Part_I_Summary_Score_Baseline
## UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Baseline ***
## UPDRS_Part_III_Summary_Score_Baseline ***
## X_Assessment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_Baseline
## X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Baseline
## COGSTATEMild Cognitive Impairment (PD-MCI)
## COGSTATENormal Cognition (PD-NC)
## COGDECLNYes
## FNCDTCOGYes
## COGDXCL50% - 89%
## COGDXCL90% - 100%
## EDUCYRS
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 510.01 on 422 degrees of freedom
## Residual deviance: 137.64 on 389 degrees of freedom
## AIC: 205.6
##
## Number of Fisher Scoring iterations: 18
summary(stepp)
##
## Call:
## glm(formula = RECRUITMENT_CAT ~ L_caudate_ComputeArea + R_caudate_ComputeArea +
## L_superior_parietal_gyrus_ComputeArea + L_superior_parietal_gyrus_Volume +
## Age + chr17_rs393152_GT + chr17_rs12185268_GT + UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Baseline +
## UPDRS_Part_III_Summary_Score_Baseline + COGSTATE, family = "binomial",
## data = ppmi_1_ROI[, -c(1, 34)], maxit = 50)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -6.647 -0.192 0.003 0.087 1.774
##
## Coefficients:
## Estimate
## (Intercept) 15.844
## L_caudate_ComputeArea 1.086
## R_caudate_ComputeArea -1.075
## L_superior_parietal_gyrus_ComputeArea 1.939
## L_superior_parietal_gyrus_Volume -1.669
## Age -0.439
## chr17_rs393152_GT -1.076
## chr17_rs12185268_GT 1.293
## UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Baseline 6.766
## UPDRS_Part_III_Summary_Score_Baseline 1.919
## COGSTATEMild Cognitive Impairment (PD-MCI) -6.212
## COGSTATENormal Cognition (PD-NC) -10.460
## Std. Error
## (Intercept) 1214.607
## L_caudate_ComputeArea 0.470
## R_caudate_ComputeArea 0.495
## L_superior_parietal_gyrus_ComputeArea 1.128
## L_superior_parietal_gyrus_Volume 1.152
## Age 0.222
## chr17_rs393152_GT 0.673
## chr17_rs12185268_GT 0.783
## UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Baseline 1.145
## UPDRS_Part_III_Summary_Score_Baseline 0.445
## COGSTATEMild Cognitive Impairment (PD-MCI) 1214.608
## COGSTATENormal Cognition (PD-NC) 1214.607
## z value
## (Intercept) 0.01
## L_caudate_ComputeArea 2.31
## R_caudate_ComputeArea -2.17
## L_superior_parietal_gyrus_ComputeArea 1.72
## L_superior_parietal_gyrus_Volume -1.45
## Age -1.98
## chr17_rs393152_GT -1.60
## chr17_rs12185268_GT 1.65
## UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Baseline 5.91
## UPDRS_Part_III_Summary_Score_Baseline 4.31
## COGSTATEMild Cognitive Impairment (PD-MCI) -0.01
## COGSTATENormal Cognition (PD-NC) -0.01
## Pr(>|z|)
## (Intercept) 0.990
## L_caudate_ComputeArea 0.021 *
## R_caudate_ComputeArea 0.030 *
## L_superior_parietal_gyrus_ComputeArea 0.086 .
## L_superior_parietal_gyrus_Volume 0.147
## Age 0.048 *
## chr17_rs393152_GT 0.110
## chr17_rs12185268_GT 0.098 .
## UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Baseline 3.5e-09 ***
## UPDRS_Part_III_Summary_Score_Baseline 1.6e-05 ***
## COGSTATEMild Cognitive Impairment (PD-MCI) 0.996
## COGSTATENormal Cognition (PD-NC) 0.993
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 510.01 on 422 degrees of freedom
## Residual deviance: 145.16 on 411 degrees of freedom
## AIC: 169.2
##
## Number of Fisher Scoring iterations: 16
Create Complete dataset with idea disease categories
ppmi_1_PD1<-ppmi_1[ppmi_1$APPRDX!="psychiatric",]
ppmi_1_PD1<-ppmi_1[ppmi_1$APPRDX!="SWEDD",]
ppmi_1_PD1<-ppmi_1_PD1[,-c(1,302)]
ppmi_1_PD2<-ppmi_1[ppmi_1$APPRDX!="psychiatric",]
ppmi_1_PD2<-ppmi_1[,-c(1,302)]
ppmi_1_PD1$PD1<-ifelse(ppmi_1_PD1$RECRUITMENT_CAT=="HC",1,0)
ppmi_1_PD1<-ppmi_1_PD1[,-300]
ppmi_1_PD1$PD1<-as.factor(ppmi_1_PD1$PD1)
Balance cases
#PD1=PD PD2=PD+SWEDD
set.seed(623)
library(unbalanced)
## Loading required package: mlr
## Loading required package: BBmisc
## Loading required package: ggplot2
## Loading required package: ParamHelpers
## Loading required package: stringi
## Loading required package: foreach
## Loading required package: doParallel
## Loading required package: iterators
## Loading required package: parallel
n<-ncol(ppmi_1_PD1)
output<-ppmi_1_PD1$PD1
input<-ppmi_1_PD1[ ,-n]
#balance the dataset
data<-ubBalance(X= input, Y=output,type="ubSMOTE", verbose=TRUE)
## Proportion of positives after ubSMOTE : 42.9 % of 861 observations
balancedData<-cbind(data$X,data$Y)
ppmi_1_PD1_balanced<-balancedData
colnames(ppmi_1_PD1_balanced)<-colnames(ppmi_1_PD1)
ppmi_1_PD2$PD2<-ifelse(ppmi_1_PD2$RECRUITMENT_CAT=="HC",1,0)
ppmi_1_PD2<-ppmi_1_PD2[,-300]
ppmi_1_PD2$PD2<-as.factor(ppmi_1_PD2$PD2)
#balance cases
set.seed(623)
require(unbalanced)
n<-ncol(ppmi_1_PD2)
output<-ppmi_1_PD2$PD2
input<-ppmi_1_PD2[ ,-n]
#balance the dataset
data<-ubBalance(X= input, Y=output,type="ubSMOTE", verbose=TRUE)
## Proportion of positives after ubSMOTE : 42.9 % of 861 observations
balancedData<-cbind(data$X,data$Y)
ppmi_1_PD2_balanced<-balancedData
colnames(ppmi_1_PD2_balanced)<-colnames(ppmi_1_PD2)
#numerize the 4 datasets
for(i in 295:298){
ppmi_1_PD1[,i]<-as.numeric(ppmi_1_PD1[,i])
}
for(i in 295:298){
ppmi_1_PD2[,i]<-as.numeric(ppmi_1_PD2[,i])
}
for(i in 295:298){
ppmi_1_PD1_balanced[,i]<-as.numeric(ppmi_1_PD1_balanced[,i])
}
for(i in 295:298){
ppmi_1_PD2_balanced[,i]<-as.numeric(ppmi_1_PD2_balanced[,i])
}
write.csv(ppmi_1_PD1,"PD1_un.csv")
write.csv(ppmi_1_PD2,"PD2_un.csv")
write.csv(ppmi_1_PD1_balanced,"PD1_b.csv")
write.csv(ppmi_1_PD2_balanced,"PD2_b.csv")
Prediction Functions
library(crossval)
##
## Attaching package: 'crossval'
## The following object is masked from 'package:mlr':
##
## crossval
#adaboost
library(ada)
## Loading required package: rpart
library(rpart)
my.ada <- function (train.x, train.y, test.x, test.y, negative, formula){
ada.fit <- ada(train.x, train.y)
predict.y <- predict(ada.fit, test.x)
#count TP,NP..
out <- confusionMatrix(test.y, predict.y, negative = negative)
return (out)
}
# svm
library(e1071)
##
## Attaching package: 'e1071'
## The following object is masked from 'package:mlr':
##
## impute
my.svm <- function (train.x, train.y, test.x, test.y, negative, formula){
svm.fit <- svm(train.x, train.y)
predict.y <- predict(svm.fit, test.x)
#count TP,NP..
out <- confusionMatrix(test.y, predict.y, negative = negative)
return (out)
}
# naive bayesian
my.nb <- function (train.x, train.y, test.x, test.y, negative, formula){
nb.fit <- naiveBayes(train.x, train.y)
predict.y <- predict(nb.fit, test.x)
#count TP,NP..
out <- confusionMatrix(test.y, predict.y, negative = negative)
return (out)
}
# decision trees
library(rpart)
my.detree <- function (train.x, train.y, test.x, test.y, negative, formula){
train.matrix<-cbind(train.x,train.y)
detree.fit <- rpart(train.y~.,method="class",data=train.matrix)
predict.y <- predict(detree.fit, test.x)
out <- confusionMatrix(test.y, predict.y, negative = negative)
return (out)
}
#k-Nearest Neighbor Classiï¬cation
library("class")
my.knn <- function (train.x, train.y, test.x, test.y, negative, formula){
train.matrix<-cbind(train.x,train.y)
test.matrix<-cbind(test.x,test.y)
predict.y <- knn(train=train.matrix,test=test.matrix,cl=train.y,k=2)
out <- confusionMatrix(test.y, predict.y, negative = negative)
return (out)
}
# Kmeans
my.kmeans <- function (train.x, train.y, test.x, test.y, negative, formula){
train.matrix<-cbind(train.x,train.y)
test.matrix<-cbind(test.x,test.y)
kmeans.fit <- kmeans(train.x,2)
predict.y <- kmeans.fit$cluster-1
out <- confusionMatrix(test.y,predict.y, negative = negative)
return (out)
}
function dealt with complete dataset
classfi<-function(a=my.ada){
a1<-matrix(NA,4,4)
a2<-matrix(NA,4,6)
#PD1 unbalanced
X <- ppmi_1_PD1[, 1:299]
Y <- ppmi_1_PD1[,300]
form <- "train.y~."
neg <- 0
set.seed(714)
cv.out <- crossval(a, X, Y, K = 5, B = 1, negative = neg, formula = form)
out <- diagnosticErrors(cv.out$stat)
a1[1,]<-cv.out$stat
a2[1,]<-out
#PD2 unbalanced
X <- ppmi_1_PD2[, 1:299]
Y <- ppmi_1_PD2[,300]
form <- "train.y~."
neg <- 0
set.seed(714)
cv.out <- crossval(a, X, Y, K = 5, B = 1, negative = neg, formula = form)
out <- diagnosticErrors(cv.out$stat)
a1[2,]<-cv.out$stat
a2[2,]<-out
#PD1 balanced
X <- ppmi_1_PD1_balanced[, 1:299]
Y <- ppmi_1_PD1_balanced[,300]
form <- "train.y~."
neg <- 0
set.seed(714)
cv.out <- crossval(a, X, Y, K = 5, B = 1, negative = neg, formula = form)
out <- diagnosticErrors(cv.out$stat)
a1[3,]<-cv.out$stat
a2[3,]<-out
#PD2 balanced
X <- ppmi_1_PD2_balanced[, 1:299]
Y <- ppmi_1_PD2_balanced[,300]
form <- "train.y~."
neg <- 0
set.seed(714)
cv.out <- crossval(a, X, Y, K = 5, B = 1, negative = neg, formula = form)
out <- diagnosticErrors(cv.out$stat)
a1[4,]<-cv.out$stat
a2[4,]<-out
classification<-as.data.frame(cbind(a1,a2))
classification$Group<-c("PD1_un","PD2_un","PD1_b","PD2_b")
colnames(classification)[1:10]<-c("FP","TP","TN","FN","acc","sens","spec","ppv","npv","lor")
write.csv(classification,"classification.csv")
print(classification)
}
Classification result
adaboost
classfi()
## Number of folds: 5
## Total number of CV fits: 5
##
## Round # 1 of 1
## CV Fit # 1 of 5
## CV Fit # 2 of 5
## CV Fit # 3 of 5
## CV Fit # 4 of 5
## CV Fit # 5 of 5
## Number of folds: 5
## Total number of CV fits: 5
##
## Round # 1 of 1
## CV Fit # 1 of 5
## CV Fit # 2 of 5
## CV Fit # 3 of 5
## CV Fit # 4 of 5
## CV Fit # 5 of 5
## Number of folds: 5
## Total number of CV fits: 5
##
## Round # 1 of 1
## CV Fit # 1 of 5
## CV Fit # 2 of 5
## CV Fit # 3 of 5
## CV Fit # 4 of 5
## CV Fit # 5 of 5
## Number of folds: 5
## Total number of CV fits: 5
##
## Round # 1 of 1
## CV Fit # 1 of 5
## CV Fit # 2 of 5
## CV Fit # 3 of 5
## CV Fit # 4 of 5
## CV Fit # 5 of 5
## FP TP TN FN acc sens spec ppv npv lor Group
## 1 0.6 22.6 52.0 2.0 0.966 0.919 0.989 0.974 0.963 6.89 PD1_un
## 2 0.6 22.0 59.4 2.6 0.962 0.894 0.990 0.973 0.958 6.73 PD2_un
## 3 0.8 71.6 97.6 2.2 0.983 0.970 0.992 0.989 0.978 8.29 PD1_b
## 4 2.0 71.8 96.4 2.0 0.977 0.973 0.980 0.973 0.980 7.46 PD2_b
decision tree
classfi(a=my.detree)
## Number of folds: 5
## Total number of CV fits: 5
##
## Round # 1 of 1
## CV Fit # 1 of 5
## CV Fit # 2 of 5
## CV Fit # 3 of 5
## CV Fit # 4 of 5
## CV Fit # 5 of 5
## Number of folds: 5
## Total number of CV fits: 5
##
## Round # 1 of 1
## CV Fit # 1 of 5
## CV Fit # 2 of 5
## CV Fit # 3 of 5
## CV Fit # 4 of 5
## CV Fit # 5 of 5
## Number of folds: 5
## Total number of CV fits: 5
##
## Round # 1 of 1
## CV Fit # 1 of 5
## CV Fit # 2 of 5
## CV Fit # 3 of 5
## CV Fit # 4 of 5
## CV Fit # 5 of 5
## Number of folds: 5
## Total number of CV fits: 5
##
## Round # 1 of 1
## CV Fit # 1 of 5
## CV Fit # 2 of 5
## CV Fit # 3 of 5
## CV Fit # 4 of 5
## CV Fit # 5 of 5
## FP TP TN FN acc sens spec ppv npv lor Group
## 1 104 44.4 1.0 4.8 0.294 0.902 0.00951 0.299 0.172 -2.422 PD1_un
## 2 120 49.2 0.0 0.0 0.291 1.000 0.00000 0.291 NaN NaN PD2_un
## 3 195 146.8 1.4 0.8 0.430 0.995 0.00711 0.429 0.636 0.274 PD1_b
## 4 196 146.4 0.4 1.2 0.426 0.992 0.00203 0.427 0.250 -1.392 PD2_b
naive bayes
classfi(a=my.nb)
## Number of folds: 5
## Total number of CV fits: 5
##
## Round # 1 of 1
## CV Fit # 1 of 5
## CV Fit # 2 of 5
## CV Fit # 3 of 5
## CV Fit # 4 of 5
## CV Fit # 5 of 5
## Number of folds: 5
## Total number of CV fits: 5
##
## Round # 1 of 1
## CV Fit # 1 of 5
## CV Fit # 2 of 5
## CV Fit # 3 of 5
## CV Fit # 4 of 5
## CV Fit # 5 of 5
## Number of folds: 5
## Total number of CV fits: 5
##
## Round # 1 of 1
## CV Fit # 1 of 5
## CV Fit # 2 of 5
## CV Fit # 3 of 5
## CV Fit # 4 of 5
## CV Fit # 5 of 5
## Number of folds: 5
## Total number of CV fits: 5
##
## Round # 1 of 1
## CV Fit # 1 of 5
## CV Fit # 2 of 5
## CV Fit # 3 of 5
## CV Fit # 4 of 5
## CV Fit # 5 of 5
## FP TP TN FN acc sens spec ppv npv lor Group
## 1 10.6 9.2 42.0 15.4 0.663 0.374 0.798 0.465 0.732 0.862 PD1_un
## 2 15.4 12.4 44.6 12.2 0.674 0.504 0.743 0.446 0.785 1.080 PD2_un
## 3 45.8 64.4 52.6 9.4 0.679 0.873 0.535 0.584 0.848 2.063 PD1_b
## 4 44.8 65.0 53.6 8.8 0.689 0.881 0.545 0.592 0.859 2.179 PD2_b
svm
classfi(a=my.svm)
## Number of folds: 5
## Total number of CV fits: 5
##
## Round # 1 of 1
## CV Fit # 1 of 5
## CV Fit # 2 of 5
## CV Fit # 3 of 5
## CV Fit # 4 of 5
## CV Fit # 5 of 5
## Number of folds: 5
## Total number of CV fits: 5
##
## Round # 1 of 1
## CV Fit # 1 of 5
## CV Fit # 2 of 5
## CV Fit # 3 of 5
## CV Fit # 4 of 5
## CV Fit # 5 of 5
## Number of folds: 5
## Total number of CV fits: 5
##
## Round # 1 of 1
## CV Fit # 1 of 5
## CV Fit # 2 of 5
## CV Fit # 3 of 5
## CV Fit # 4 of 5
## CV Fit # 5 of 5
## Number of folds: 5
## Total number of CV fits: 5
##
## Round # 1 of 1
## CV Fit # 1 of 5
## CV Fit # 2 of 5
## CV Fit # 3 of 5
## CV Fit # 4 of 5
## CV Fit # 5 of 5
## FP TP TN FN acc sens spec ppv npv lor Group
## 1 0.4 4.8 52.2 19.8 0.738 0.1951 0.992 0.923 0.725 3.45 PD1_un
## 2 0.6 1.4 59.4 23.2 0.719 0.0569 0.990 0.700 0.719 1.79 PD2_un
## 3 2.0 69.2 96.4 4.6 0.962 0.9377 0.980 0.972 0.954 6.59 PD1_b
## 4 2.8 67.0 95.6 6.8 0.944 0.9079 0.972 0.960 0.934 5.82 PD2_b
knn
classfi(a=my.knn)
## Number of folds: 5
## Total number of CV fits: 5
##
## Round # 1 of 1
## CV Fit # 1 of 5
## CV Fit # 2 of 5
## CV Fit # 3 of 5
## CV Fit # 4 of 5
## CV Fit # 5 of 5
## Number of folds: 5
## Total number of CV fits: 5
##
## Round # 1 of 1
## CV Fit # 1 of 5
## CV Fit # 2 of 5
## CV Fit # 3 of 5
## CV Fit # 4 of 5
## CV Fit # 5 of 5
## Number of folds: 5
## Total number of CV fits: 5
##
## Round # 1 of 1
## CV Fit # 1 of 5
## CV Fit # 2 of 5
## CV Fit # 3 of 5
## CV Fit # 4 of 5
## CV Fit # 5 of 5
## Number of folds: 5
## Total number of CV fits: 5
##
## Round # 1 of 1
## CV Fit # 1 of 5
## CV Fit # 2 of 5
## CV Fit # 3 of 5
## CV Fit # 4 of 5
## CV Fit # 5 of 5
## FP TP TN FN acc sens spec ppv npv lor Group
## 1 15.2 7.6 37.4 17.0 0.583 0.309 0.711 0.333 0.688 0.0953 PD1_un
## 2 17.2 7.2 42.8 17.4 0.591 0.293 0.713 0.295 0.711 0.0292 PD2_un
## 3 17.8 55.2 80.6 18.6 0.789 0.748 0.819 0.756 0.812 2.5981 PD1_b
## 4 20.4 56.2 78.0 17.6 0.779 0.762 0.793 0.734 0.816 2.5022 PD2_b
kmeans
classfi(a=my.kmeans)
## Number of folds: 5
## Total number of CV fits: 5
##
## Round # 1 of 1
## CV Fit # 1 of 5
## CV Fit # 2 of 5
## CV Fit # 3 of 5
## CV Fit # 4 of 5
## CV Fit # 5 of 5
## Number of folds: 5
## Total number of CV fits: 5
##
## Round # 1 of 1
## CV Fit # 1 of 5
## CV Fit # 2 of 5
## CV Fit # 3 of 5
## CV Fit # 4 of 5
## CV Fit # 5 of 5
## Number of folds: 5
## Total number of CV fits: 5
##
## Round # 1 of 1
## CV Fit # 1 of 5
## CV Fit # 2 of 5
## CV Fit # 3 of 5
## CV Fit # 4 of 5
## CV Fit # 5 of 5
## Number of folds: 5
## Total number of CV fits: 5
##
## Round # 1 of 1
## CV Fit # 1 of 5
## CV Fit # 2 of 5
## CV Fit # 3 of 5
## CV Fit # 4 of 5
## CV Fit # 5 of 5
## FP TP TN FN acc sens spec ppv npv lor Group
## 1 87.6 38.4 124 58.4 0.527 0.397 0.587 0.305 0.681 -0.0685 PD1_un
## 2 114.2 44.6 127 52.6 0.507 0.459 0.527 0.281 0.707 -0.0587 PD2_un
## 3 163.6 126.0 232 167.6 0.519 0.429 0.586 0.435 0.580 0.0623 PD1_b
## 4 165.4 124.2 230 169.4 0.514 0.423 0.581 0.429 0.576 0.0185 PD2_b
function dealt with dataset without UPDRS (and clinical features)
classfi1<-function(a=my.ada){
a1<-matrix(NA,4,4)
a2<-matrix(NA,4,6)
#PD1 unbalanced
X <- ppmi_1_PD1[, 1:289]
Y <- ppmi_1_PD1[,300]
form <- "train.y~."
neg <- 0
set.seed(714)
cv.out <- crossval(a, X, Y, K = 5, B = 1, negative = neg, formula = form)
out <- diagnosticErrors(cv.out$stat)
a1[1,]<-cv.out$stat
a2[1,]<-out
#PD2 unbalanced
X <- ppmi_1_PD2[, 1:289]
Y <- ppmi_1_PD2[,300]
form <- "train.y~."
neg <- 0
set.seed(714)
cv.out <- crossval(a, X, Y, K = 5, B = 1, negative = neg, formula = form)
out <- diagnosticErrors(cv.out$stat)
a1[2,]<-cv.out$stat
a2[2,]<-out
#PD1 balanced
X <- ppmi_1_PD1_balanced[, 1:289]
Y <- ppmi_1_PD1_balanced[,300]
form <- "train.y~."
neg <- 0
set.seed(714)
cv.out <- crossval(a, X, Y, K = 5, B = 1, negative = neg, formula = form)
out <- diagnosticErrors(cv.out$stat)
a1[3,]<-cv.out$stat
a2[3,]<-out
#PD2 balanced
X <- ppmi_1_PD2_balanced[, 1:289]
Y <- ppmi_1_PD2_balanced[,300]
form <- "train.y~."
neg <- 0
set.seed(714)
cv.out <- crossval(a, X, Y, K = 5, B = 1, negative = neg, formula = form)
out <- diagnosticErrors(cv.out$stat)
a1[4,]<-cv.out$stat
a2[4,]<-out
classification<-cbind(a1,a2)
colnames(classification)[1:10]<-c("FP","TP","TN","FN","acc","sens","spec","ppv","npv","lor")
write.csv(classification,"classification2.csv")
print(classification)
}
adaboost
classfi1()
## Number of folds: 5
## Total number of CV fits: 5
##
## Round # 1 of 1
## CV Fit # 1 of 5
## CV Fit # 2 of 5
## CV Fit # 3 of 5
## CV Fit # 4 of 5
## CV Fit # 5 of 5
## Number of folds: 5
## Total number of CV fits: 5
##
## Round # 1 of 1
## CV Fit # 1 of 5
## CV Fit # 2 of 5
## CV Fit # 3 of 5
## CV Fit # 4 of 5
## CV Fit # 5 of 5
## Number of folds: 5
## Total number of CV fits: 5
##
## Round # 1 of 1
## CV Fit # 1 of 5
## CV Fit # 2 of 5
## CV Fit # 3 of 5
## CV Fit # 4 of 5
## CV Fit # 5 of 5
## Number of folds: 5
## Total number of CV fits: 5
##
## Round # 1 of 1
## CV Fit # 1 of 5
## CV Fit # 2 of 5
## CV Fit # 3 of 5
## CV Fit # 4 of 5
## CV Fit # 5 of 5
## FP TP TN FN acc sens spec ppv npv lor
## [1,] 4.0 4.0 48.6 20.6 0.681 0.1626 0.924 0.500 0.702 0.858
## [2,] 4.2 2.0 55.8 22.6 0.683 0.0813 0.930 0.323 0.712 0.162
## [3,] 2.8 58.0 95.6 15.8 0.892 0.7859 0.972 0.954 0.858 4.831
## [4,] 4.6 55.8 93.8 18.0 0.869 0.7561 0.953 0.924 0.839 4.147
decision tree
classfi1(a=my.detree)
## Number of folds: 5
## Total number of CV fits: 5
##
## Round # 1 of 1
## CV Fit # 1 of 5
## CV Fit # 2 of 5
## CV Fit # 3 of 5
## CV Fit # 4 of 5
## CV Fit # 5 of 5
## Number of folds: 5
## Total number of CV fits: 5
##
## Round # 1 of 1
## CV Fit # 1 of 5
## CV Fit # 2 of 5
## CV Fit # 3 of 5
## CV Fit # 4 of 5
## CV Fit # 5 of 5
## Number of folds: 5
## Total number of CV fits: 5
##
## Round # 1 of 1
## CV Fit # 1 of 5
## CV Fit # 2 of 5
## CV Fit # 3 of 5
## CV Fit # 4 of 5
## CV Fit # 5 of 5
## Number of folds: 5
## Total number of CV fits: 5
##
## Round # 1 of 1
## CV Fit # 1 of 5
## CV Fit # 2 of 5
## CV Fit # 3 of 5
## CV Fit # 4 of 5
## CV Fit # 5 of 5
## FP TP TN FN acc sens spec ppv npv lor
## [1,] 100 47.4 5.0 1.8 0.339 0.963 0.0475 0.321 0.735 0.2731
## [2,] 115 48.2 5.0 1.0 0.314 0.980 0.0417 0.295 0.833 0.7399
## [3,] 187 139.8 9.8 7.8 0.434 0.947 0.0498 0.428 0.557 -0.0626
## [4,] 189 139.6 7.4 8.0 0.427 0.946 0.0376 0.424 0.481 -0.3830
naive bayes
classfi1(a=my.nb)
## Number of folds: 5
## Total number of CV fits: 5
##
## Round # 1 of 1
## CV Fit # 1 of 5
## CV Fit # 2 of 5
## CV Fit # 3 of 5
## CV Fit # 4 of 5
## CV Fit # 5 of 5
## Number of folds: 5
## Total number of CV fits: 5
##
## Round # 1 of 1
## CV Fit # 1 of 5
## CV Fit # 2 of 5
## CV Fit # 3 of 5
## CV Fit # 4 of 5
## CV Fit # 5 of 5
## Number of folds: 5
## Total number of CV fits: 5
##
## Round # 1 of 1
## CV Fit # 1 of 5
## CV Fit # 2 of 5
## CV Fit # 3 of 5
## CV Fit # 4 of 5
## CV Fit # 5 of 5
## Number of folds: 5
## Total number of CV fits: 5
##
## Round # 1 of 1
## CV Fit # 1 of 5
## CV Fit # 2 of 5
## CV Fit # 3 of 5
## CV Fit # 4 of 5
## CV Fit # 5 of 5
## FP TP TN FN acc sens spec ppv npv lor
## [1,] 13.0 7.8 39.6 16.8 0.614 0.317 0.753 0.375 0.702 0.347
## [2,] 16.6 8.8 43.4 15.8 0.617 0.358 0.723 0.346 0.733 0.376
## [3,] 72.4 64.2 26.0 9.6 0.524 0.870 0.264 0.470 0.730 0.876
## [4,] 70.6 64.6 27.8 9.2 0.537 0.875 0.283 0.478 0.751 1.017
svm
classfi1(a=my.svm)
## Number of folds: 5
## Total number of CV fits: 5
##
## Round # 1 of 1
## CV Fit # 1 of 5
## CV Fit # 2 of 5
## CV Fit # 3 of 5
## CV Fit # 4 of 5
## CV Fit # 5 of 5
## Number of folds: 5
## Total number of CV fits: 5
##
## Round # 1 of 1
## CV Fit # 1 of 5
## CV Fit # 2 of 5
## CV Fit # 3 of 5
## CV Fit # 4 of 5
## CV Fit # 5 of 5
## Number of folds: 5
## Total number of CV fits: 5
##
## Round # 1 of 1
## CV Fit # 1 of 5
## CV Fit # 2 of 5
## CV Fit # 3 of 5
## CV Fit # 4 of 5
## CV Fit # 5 of 5
## Number of folds: 5
## Total number of CV fits: 5
##
## Round # 1 of 1
## CV Fit # 1 of 5
## CV Fit # 2 of 5
## CV Fit # 3 of 5
## CV Fit # 4 of 5
## CV Fit # 5 of 5
## FP TP TN FN acc sens spec ppv npv lor
## [1,] 0.2 0.4 52.4 24.2 0.684 0.01626 0.996 0.667 0.684 1.47
## [2,] 0.4 0.2 59.6 24.4 0.707 0.00813 0.993 0.333 0.710 0.20
## [3,] 4.8 53.0 93.6 20.8 0.851 0.71816 0.951 0.917 0.818 3.91
## [4,] 4.2 50.4 94.2 23.4 0.840 0.68293 0.957 0.923 0.801 3.88
knn
classfi1(a=my.knn)
## Number of folds: 5
## Total number of CV fits: 5
##
## Round # 1 of 1
## CV Fit # 1 of 5
## CV Fit # 2 of 5
## CV Fit # 3 of 5
## CV Fit # 4 of 5
## CV Fit # 5 of 5
## Number of folds: 5
## Total number of CV fits: 5
##
## Round # 1 of 1
## CV Fit # 1 of 5
## CV Fit # 2 of 5
## CV Fit # 3 of 5
## CV Fit # 4 of 5
## CV Fit # 5 of 5
## Number of folds: 5
## Total number of CV fits: 5
##
## Round # 1 of 1
## CV Fit # 1 of 5
## CV Fit # 2 of 5
## CV Fit # 3 of 5
## CV Fit # 4 of 5
## CV Fit # 5 of 5
## Number of folds: 5
## Total number of CV fits: 5
##
## Round # 1 of 1
## CV Fit # 1 of 5
## CV Fit # 2 of 5
## CV Fit # 3 of 5
## CV Fit # 4 of 5
## CV Fit # 5 of 5
## FP TP TN FN acc sens spec ppv npv lor
## [1,] 15.2 7.6 37.4 17.0 0.583 0.309 0.711 0.333 0.688 0.0953
## [2,] 17.2 7.2 42.8 17.4 0.591 0.293 0.713 0.295 0.711 0.0292
## [3,] 17.8 55.2 80.6 18.6 0.789 0.748 0.819 0.756 0.812 2.5981
## [4,] 20.4 56.2 78.0 17.6 0.779 0.762 0.793 0.734 0.816 2.5022
kmeans
classfi1(a=my.kmeans)
## Number of folds: 5
## Total number of CV fits: 5
##
## Round # 1 of 1
## CV Fit # 1 of 5
## CV Fit # 2 of 5
## CV Fit # 3 of 5
## CV Fit # 4 of 5
## CV Fit # 5 of 5
## Number of folds: 5
## Total number of CV fits: 5
##
## Round # 1 of 1
## CV Fit # 1 of 5
## CV Fit # 2 of 5
## CV Fit # 3 of 5
## CV Fit # 4 of 5
## CV Fit # 5 of 5
## Number of folds: 5
## Total number of CV fits: 5
##
## Round # 1 of 1
## CV Fit # 1 of 5
## CV Fit # 2 of 5
## CV Fit # 3 of 5
## CV Fit # 4 of 5
## CV Fit # 5 of 5
## Number of folds: 5
## Total number of CV fits: 5
##
## Round # 1 of 1
## CV Fit # 1 of 5
## CV Fit # 2 of 5
## CV Fit # 3 of 5
## CV Fit # 4 of 5
## CV Fit # 5 of 5
## FP TP TN FN acc sens spec ppv npv lor
## [1,] 87.6 38.4 124 58.4 0.527 0.397 0.587 0.305 0.681 -0.0685
## [2,] 114.2 44.6 127 52.6 0.507 0.459 0.527 0.281 0.707 -0.0587
## [3,] 163.6 126.0 232 167.6 0.519 0.429 0.586 0.435 0.580 0.0623
## [4,] 165.4 124.2 230 169.4 0.514 0.423 0.581 0.429 0.576 0.0185
adaboost scores and results
# Varplot, table S2 and table S3
require(ada)
#unbalanced PD1
set.seed(729)
c1<-ada(PD1~.,data=ppmi_1_PD1)

n_c<-ncol(ppmi_1_PD1)-1
PD1_score<-varplot(c1,type="scores",max.var.show=n_c)

top30<-varplot(c1,type="scores")

data_top30<-cbind(names(top30),"PD1","unBalanced")
write.csv(data_top30,"top30.csv")
#unbalanced PD2
set.seed(729)
c1<-ada(PD2~.,data=ppmi_1_PD2)

n_c<-ncol(ppmi_1_PD2)-1
PD2_score<-varplot(c1,type="scores",max.var.show=n_c)

top30<-varplot(c1,type="scores")

data_top30<-cbind(names(top30),"PD2","unBalanced")
write.csv(data_top30,"top30.csv")
#balanced PD1
set.seed(729)
c1<-ada(PD1~.,data=ppmi_1_PD1_balanced)

n_c<-ncol(ppmi_1_PD1_balanced)-1
PD1_score_b<-varplot(c1,type="scores",max.var.show=n_c)

top30<-varplot(c1,type="scores")

data_top30<-cbind(names(top30),"PD1","Balanced")
write.csv(data_top30,"top30.csv")
#balanced PD2
set.seed(729)
c1<-ada(PD2~.,data=ppmi_1_PD2_balanced)

n_c<-ncol(ppmi_1_PD2_balanced)-1
PD2_score_b<-varplot(c1,type="scores",max.var.show=n_c)

top30<-varplot(c1,type="scores")

data_top30<-cbind(names(top30),"PD2","Balanced")
write.csv(data_top30,"top30.csv")
write.csv(PD1_score,"trial1.csv")
write.csv(PD2_score,"trial2.csv")
write.csv(PD1_score_b,"trial3.csv")
write.csv(PD2_score_b,"trial4.csv")
########
without UPDRS
#unbalanced PD1
set.seed(729)
c1<-ada(PD1~.,data=ppmi_1_PD1[,-c(290:294)])

n_c<-ncol(ppmi_1_PD1[,-c(290:294)])-1
PD1_score_noUPDRS<-varplot(c1,type="scores",max.var.show=n_c)

top30<-varplot(c1,type="scores")

data_top30<-cbind(names(top30),"PD1","unBalanced")
write.csv(data_top30,"top30.csv")
#unbalanced PD2
set.seed(729)
c1<-ada(PD2~.,data=ppmi_1_PD2[,-c(290:294)])

n_c<-ncol(ppmi_1_PD2[,-c(290:294)])-1
PD2_score_noUPDRS<-varplot(c1,type="scores",max.var.show=n_c)

top30<-varplot(c1,type="scores")

data_top30<-cbind(names(top30),"PD2","unBalanced")
write.csv(data_top30,"top30.csv")
#balanced PD1
set.seed(729)
c1<-ada(PD1~.,data=ppmi_1_PD1_balanced[,-c(290:294)])

n_c<-ncol(ppmi_1_PD1_balanced[,-c(290:294)])-1
PD1_score_b_noUPDRS<-varplot(c1,type="scores",max.var.show=n_c)

top30<-varplot(c1,type="scores")

data_top30<-cbind(names(top30),"PD1","Balanced")
write.csv(data_top30,"top30.csv")
#balanced PD2
set.seed(729)
c1<-ada(PD2~.,data=ppmi_1_PD2_balanced[,-c(290:294)])

n_c<-ncol(ppmi_1_PD2_balanced[,-c(290:294)])-1
PD2_score_b_noUPDRS<-varplot(c1,type="scores",max.var.show=n_c)

top30<-varplot(c1,type="scores")

data_top30<-cbind(names(top30),"PD2","Balanced")
write.csv(data_top30,"top30.csv")
write.csv(PD1_score_noUPDRS,"trial5.csv")
write.csv(PD2_score_noUPDRS,"trial6.csv")
write.csv(PD1_score_b_noUPDRS,"trial7.csv")
write.csv(PD2_score_b_noUPDRS,"trial8.csv")